
  ___  ____  ____  ____  ____ ®
 /__    /   ____/   /   ____/      18.0
___/   /   /___/   /   /___/       MP—Parallel Edition

 Statistics and Data Science       Copyright 1985-2023 StataCorp LLC
                                   StataCorp
                                   4905 Lakeway Drive
                                   College Station, Texas 77845 USA
                                   800-STATA-PC        https://www.stata.com
                                   979-696-4600        stata@stata.com

Stata license: 22-user 8-core network, expiring 30 Jun 2025
Serial number: 501809309427
  Licensed to: The University of Chicago Booth School of Business
               Mercury Computing Cluster

Notes:
      1. Stata is running in batch mode.
      2. Unicode is supported; see help unicode_advice.
      3. More than 2 billion observations are allowed; see help obs_advice.
      4. Maximum number of variables is set to 5,000 but can be increased;
          see help set_maxvar.

. do "/project/fagoolsb/git/service_industries/replication_package/exhibits/2_a
> dditional_analysis.do" 

. * Author: Joe Tatarka
. * Name: 2_additional_analysis.do
. * Last Updated Date: April 15, 2025
. * Purpose: Recreate the extra little statistics in the paper such as correlat
> ions and small data work with Homebase and the National Restaurant Associatio
> n 
. 
. * Set Global File Paths
. global original_raw_root = "/project/fagoolsb/service_industries/data/dataset
> s/raw"

. global raw_root = "/project/fagoolsb/service_industries/replication_package/d
> atasets/raw"

. global data_root = "/project/fagoolsb/service_industries/replication_package/
> datasets/built"

. global export_root = "/project/fagoolsb/service_industries/replication_packag
> e/exhibits/additional_analysis"

. 
. *******************
. **** 1. Compare SafeGraph with National Restaurant Association Data
. *******************
. ** Load in Data
. import excel "${raw_root}/national_restaurant_association/rpi_march_13_2024.x
> ls", clear
(18 vars, 615 obs)

. 
. ** Clean the data 
. 
. keep A E F G

. 
. rename A year_month

. rename E sales_index 

. rename F visits_index 

. rename G labor_index 

. 
. drop in 1/6
(6 observations deleted)

. 
. destring sales_index visits_index labor_index, replace 
sales_index: all characters numeric; replaced as double
(349 missing values generated)
visits_index: all characters numeric; replaced as double
(349 missing values generated)
labor_index: all characters numeric; replaced as double
(349 missing values generated)

. drop if year_month == ""
(349 observations deleted)

. 
. gen year_month_gs = monthly(substr(year_month,3,7),"MY")
(155 missing values generated)

. format year_month_gs %tm

. 
. keep if year_month_gs >= monthly("jan2019", "MY") & year_month_gs != .
(199 observations deleted)

. 
. drop year_month 

. order year_month_gs , first

. 
. * save as tempfile 
. tempfile nra_data 

. save `nra_data', replace
(file /scratch/jtatarka/14736713/St1322531.000001 not found)
file /scratch/jtatarka/14736713/St1322531.000001 saved as .dta format

. 
. *****************************************
. ***** Bring in all of the sg/visits data 
. 
. * use in 1/1000 using "${data_root}/main_build.dta", clear
. use "${data_root}/main_build.dta", clear

. 
. label variable year_month_gs "Year-month"

. keep placekey year_month_gs nominal_spend emp visits

. sort placekey year_month_gs

. 
. ** xtset 
. egen id = group(placekey)

. xtset id year_month_gs

Panel variable: id (unbalanced)
 Time variable: year_month_gs, 2019m1 to 2022m12, but with gaps
         Delta: 1 month

. 
. *** Create year lagged variables 
. gen lagged_spend = L12.nominal_spend
(1,907,647 missing values generated)

. gen lagged_emp = L12.emp
(1,907,647 missing values generated)

. gen lagged_visits = L12.visits
(1,907,647 missing values generated)

. 
. gen more_spend_dummy = (nominal_spend > lagged_spend)

. replace more_spend_dummy = . if (nominal_spend == . | lagged_spend == .)
(1,907,647 real changes made, 1,907,647 to missing)

. gen less_spend_dummy = (nominal_spend < lagged_spend)

. replace less_spend_dummy = . if (nominal_spend == . | lagged_spend == .)
(1,907,647 real changes made, 1,907,647 to missing)

. 
. gen more_emp_dummy = (emp > lagged_emp)

. replace more_emp_dummy = . if (emp == . | lagged_emp == .)
(1,907,647 real changes made, 1,907,647 to missing)

. gen less_emp_dummy = (emp < lagged_emp)

. replace less_emp_dummy = . if (emp == . | lagged_emp == .)
(1,907,647 real changes made, 1,907,647 to missing)

. 
. gen more_visits_dummy = (visits > lagged_visits)

. replace more_visits_dummy = . if (visits == . | lagged_visits == .)
(1,907,647 real changes made, 1,907,647 to missing)

. gen less_visits_dummy = (visits < lagged_visits)

. replace less_visits_dummy = . if (visits == . | lagged_visits == .)
(1,907,647 real changes made, 1,907,647 to missing)

. 
. collapse (mean) more_spend_dummy more_visits_dummy more_emp_dummy less_spend_
> dummy less_visits_dummy less_emp_dummy, by(year_month_gs)

. 
. drop if more_spend_dummy ==. | more_visits_dummy == . | more_emp_dummy == .
(12 observations deleted)

. 
. ** Create same indexes as National Restaurant Association 
. gen sg_sales_index = 10*(more_spend_dummy - less_spend_dummy) + 100

. 
. gen sg_visits_index = 10*(more_visits_dummy - less_visits_dummy) + 100

. 
. gen sg_emp_index = 10*(more_emp_dummy - less_emp_dummy) + 100

. 
. *****************************************************************************
. **** Merge with NRA data 
. 
. merge 1:1 year_month_gs using `nra_data', nogen keep(1 3)

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                                36  
    -----------------------------------------

. 
. *****************************************************************************
. *** Make Graphs 
. label var year_month_gs "Year-Month"

. 
. *** We want to examine the monthly differences between the indexes, and show 
> correlation 
. * create month to month differences: 
. 
. tsset year_month_gs

Time variable: year_month_gs, 2020m1 to 2022m12
        Delta: 1 month

. foreach var of varlist sales_index visits_index labor_index  {
  2.         gen diff_`var' = D.`var'
  3.         label var diff_`var' "NRA"
  4. }
(1 missing value generated)
(1 missing value generated)
(1 missing value generated)

. 
. foreach var of varlist sg_sales_index sg_visits_index sg_emp_index {
  2.         gen diff_`var' = D.`var'
  3.         label var diff_`var' "Safegraph"
  4. }
(1 missing value generated)
(1 missing value generated)
(1 missing value generated)

. 
. ****** Graph and find correlation coefficient 
. corr diff_sales_index diff_sg_sales_index
(obs=35)

             | diff_sa~ diff_s..
-------------+------------------
diff_sales~x |   1.0000
diff_sg_sa~x |   0.4656   1.0000


. local correlation = r(C)[2,1]

. local fmtcorr : display %4.3f `correlation'

. graph twoway line diff_sales_index diff_sg_sales_index year_month_gs, title("
> Same-Store Sales Index, Monthly Change") ytitle(Change in Index from Previous
>  Month) subtitle("Correlation Coefficient: `fmtcorr'") ylabel(-15(5)15) legen
> d(cols(2) position(6))

. graph export "${export_root}/1_1_nra_sales_diff.pdf", replace
file
    /project/fagoolsb/service_industries/replication_package/exhibits/additio
    > nal_analysis/1_1_nra_sales_diff.pdf saved as PDF format

. 
. corr diff_visits_index diff_sg_visits_index
(obs=35)

             | diff_v~x diff_s..
-------------+------------------
diff_visit~x |   1.0000
diff_sg_vi~x |   0.7816   1.0000


. local correlation = r(C)[2,1]

. local fmtcorr : display %4.3f `correlation'

. graph twoway line diff_visits_index diff_sg_visits_index year_month_gs, title
> ("Same-Store Customer Traffic Index, Monthly Change") ytitle(Change in Index 
> from Previous Month) subtitle("Correlation Coefficient: `fmtcorr'") ylabel(-1
> 5(5)15) legend(cols(2) position(6))

. graph export "${export_root}/1_2_nra_visits_diff.pdf", replace
file
    /project/fagoolsb/service_industries/replication_package/exhibits/additio
    > nal_analysis/1_2_nra_visits_diff.pdf saved as PDF format

. 
. corr diff_labor_index diff_sg_emp_index
(obs=35)

             | diff_l~x d~emp_~x
-------------+------------------
diff_labor~x |   1.0000
diff_sg_em~x |   0.6893   1.0000


. local correlation = r(C)[2,1]

. local fmtcorr : display %4.3f `correlation'

. graph twoway line diff_labor_index diff_sg_emp_index year_month_gs, title("Sa
> me-Store Labor Index, Monthly Change") ytitle(Change in Index from Previous M
> onth) subtitle("Correlation Coefficient: `fmtcorr'") ylabel(-15(5)15) legend(
> cols(2) position(6))

. graph export "${export_root}/1_3_nra_emp_diff.pdf", replace
file
    /project/fagoolsb/service_industries/replication_package/exhibits/additio
    > nal_analysis/1_3_nra_emp_diff.pdf saved as PDF format

. 
. ********************************************
. ***** 2. Homebase Analysis 
. ********************************************
. *** Homebase is a employee scheduling program, we use their data to see the a
> verage length of restaurant employee shifts. 
. *** Small caveat: We originally had a contract with Homebase to use their dat
> a going to July of 2022
. *** I reached out to them and they gave me access to their data going to pres
> ent day but they never signed the follow up contract. I am going to use their
>  data going from Jan 2019 - Dec 2022.
. 
. ** Get data from Homebase Limited service eating places
. use if inlist(naics_code, 722513, 722514, 722515) using "${original_raw_root}
> /homebase/homebase_2018_2022.dta", clear

. 
. ** Do some cleaning
. * drop if county_code is missing or if establishment is in puerto rico
. drop if county_code == ""
(664,345 observations deleted)

. drop if state == "pr"
(1,243,990 observations deleted)

. 
. * drop if no hours worked or missing hours worked
. drop if hours_worked == 0 | hours_worked == .
(146,266 observations deleted)

. 
. *** Create some key variables 
. gen year_month_gs = ym(year(event_date), month(event_date))

. format year_month_gs %tm

. 
. gen shift_less_4 = (hours_worked <4)

. 
. ********************************* 
. ** Monthly Level graphs
. gen less_4_hours = hours_worked if hours_worked <4
(59,843,156 missing values generated)

. 
. gen agg_hours = hours_worked

. 
. collapse (mean) hours_worked shift_less_4 (sum) agg_hours less_4_hours, by(ye
> ar_month_gs)

. 
. gen agg_hours_share = less_4_hours/agg_hours

. label var agg_hours_share "Share of Agg Hours from Shifts Less than 4 Hours"

. label var hours_worked "Avg Shift Length"

. label var agg_hours "Aggregate Hours"

. label var shift_less_4 "Share of Shifts less than 4 hours"

. label var year_month_gs "Year-Month"

. 
. replace agg_hours = agg_hours/1000000 // put into millions
(60 real changes made)

. 
. graph twoway line hours_worked year_month_gs, title("1. Avg Shift Length") su
> btitle("Limited-Service") ytitle("Avg Shift Length") ylabel(0(2)8)

. graph export "${export_root}/2_1_homebase.pdf", replace
file
    /project/fagoolsb/service_industries/replication_package/exhibits/additio
    > nal_analysis/2_1_homebase.pdf saved as PDF format

. 
. graph twoway line shift_less_4 year_month_gs, title("2. Share of Shifts Less 
> than 4 Hours") subtitle("Limited-Service") ytitle("Share of Shifts less than 
> 4 Hours") ylabel(0(0.05)0.2)

. graph export "${export_root}/2_2_homebase.pdf", replace
file
    /project/fagoolsb/service_industries/replication_package/exhibits/additio
    > nal_analysis/2_2_homebase.pdf saved as PDF format

. 
. graph twoway line agg_hours_share year_month_gs , title("3. Share of Agg hour
> s from <4 hr shifts") subtitle("Limited-Service") ytitle("Share of Agg hours 
> from <4 hr shifts") ylabel(0(0.01)0.06)

. graph export "${export_root}/2_3_homebase.pdf", replace
file
    /project/fagoolsb/service_industries/replication_package/exhibits/additio
    > nal_analysis/2_3_homebase.pdf saved as PDF format

. 
. ***********************
. *** 3. Checking Correlations
. ***********************
. **** Check Year-over-year growth rate correlation of Figure A1, industry vs l
> imited service real sales per employee 
. use "${data_root}/aggregates_build.dta", clear 

. tsset year_month_gs

Time variable: year_month_gs, 1992m1 to 2024m12
        Delta: 1 month

. 
. sort year_month_gs

. foreach var of varlist prod_food_nsa prod_limited_food_cpi  {
  2.         gen `var'_y_g = 100*(`var' -l12.`var')/l12.`var'
  3. }
(14 missing values generated)
(14 missing values generated)

. *** Industry Productivity and Limited Service Productivity, Not Seasonally Ad
> justed
. corr prod_food_nsa_y_g prod_limited_food_cpi_y_g
(obs=382)

             | prod_f~g prod_l~g
-------------+------------------
prod_food_~g |   1.0000
prod_limit~g |   0.8260   1.0000


. 
. ****** Correlations of Monthly Growth Rates of comparisons of SafeGraph Aggre
> gates and BLS/Census Aggregates
. sort year_month_gs

. foreach var of varlist spend_sg visits_sg emp_sg emp_limited spend_limited {
  2.         gen `var'_g = 100*(`var' -l.`var')/l.`var'
  3. }
(349 missing values generated)
(349 missing values generated)
(349 missing values generated)
(3 missing values generated)
(73 missing values generated)

. 
. ********* Monthly growth rates correlations 
. ** SG Visits and Census Sales
. corr visits_sg_g spend_limited_g 
(obs=47)

             | visit~_g spen~d_g
-------------+------------------
 visits_sg_g |   1.0000
spend_limi~g |   0.8784   1.0000


. * SG Sales and Census Sales
. corr spend_sg_g spend_limited_g
(obs=47)

             | spen~g_g spen~d_g
-------------+------------------
  spend_sg_g |   1.0000
spend_limi~g |   0.7225   1.0000


. * SG Emp and BLS Emp
. corr emp_sg_g emp_limited_g
(obs=47)

             | emp_sg_g emp_li~g
-------------+------------------
    emp_sg_g |   1.0000
emp_limite~g |   0.6317   1.0000


. 
. ******************************
. **** Entry and Exit
. ******************************
. ** Compare SG Sample Entry and Exit to Census Business Dynamic Statistics (BD
> S) annual rates
. import delimited "${raw_root}/business_dynamic_statistics/bds2022_vcn4.csv", 
> clear
(encoding automatically selected: ISO-8859-1)
(26 vars, 12,960 obs)

. keep if vcnaics4 == 7225
(12,915 observations deleted)

. keep if year >= 2018
(40 observations deleted)

. keep year vcnaics4 estabs estabs_entry estabs_entry_rate estabs_exit estabs_e
> xit_rate

. destring year vcnaics4 estabs estabs_entry estabs_entry_rate estabs_exit esta
> bs_exit_rate, replace
year already numeric; no replace
vcnaics4 already numeric; no replace
estabs already numeric; no replace
estabs_entry: all characters numeric; replaced as long
estabs_entry_rate: all characters numeric; replaced as double
estabs_exit: all characters numeric; replaced as long
estabs_exit_rate: all characters numeric; replaced as double

. 
. tempfile entry_exit

. save `entry_exit', replace 
(file /scratch/jtatarka/14736713/St1322531.000002 not found)
file /scratch/jtatarka/14736713/St1322531.000002 saved as .dta format

. 
. ****** Load in SG Data 
. *use in 1/1000 using "${data_root}/main_build.dta", clear 
. use placekey year_month_gs using "${data_root}/main_build.dta", clear 

. gen year = 1960 + floor(year_month_gs/12)

. bys placekey year: drop if _n != 1
(5,154,123 observations deleted)

. gen num_poi = 1

. 
. fillin placekey year

. sort placekey year

. bys placekey: gen exit = 1 if num_poi[_n] == . & num_poi[_n-1] != .
(610,977 missing values generated)

. bys placekey: gen entry = 1 if num_poi[_n] != . & num_poi[_n-1] == .
(460,367 missing values generated)

. 
. collapse (sum) num_poi entry exit, by(year) 

. 
. ** create entry and exit rates rates following how BDS does it
. tsset year

Time variable: year, 2019 to 2022
        Delta: 1 unit

. gen sg_exit_rate = 100*exit/((num_poi + l.num_poi)/2)
(1 missing value generated)

. gen sg_entry_rate = 100*entry/((num_poi + l.num_poi)/2)
(1 missing value generated)

. 
. ** merge with BDS entry and exit
. merge 1:1 year using `entry_exit', nogen 

    Result                      Number of obs
    -----------------------------------------
    Not matched                             1
        from master                         0  
        from using                          1  

    Matched                                 4  
    -----------------------------------------

. 
. sort year

. keep year sg_entry_rate sg_exit_rate estabs_entry_rate estabs_exit_rate

. drop if year == 2018
(1 observation deleted)

. 
. * Look at average of rates for 2020-2022
. sum sg_entry_rate estabs_entry_rate sg_exit_rate estabs_exit_rate if year >= 
> 2020

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
sg_entry_r~e |          3    6.073953    2.575866   4.293992   9.027641
estabs_ent~e |          3    10.33033    .9930425      9.757     11.477
sg_exit_rate |          3    2.030456    .5215016   1.573708   2.598682
estabs_exi~e |          3    9.358333    1.005883      8.329     10.339

. 
. 
. **************************
. **** Employees per Establishment - Census County Business Patterns 
. 
. ** 2019
. import delimited "${raw_root}/county_business_patterns/cbp19co.txt", clear
(encoding automatically selected: UTF-8)
(25 vars, 1,085,472 obs)

. 
. keep if substr(naics,1,4) == "7225"
(1,071,877 observations deleted)

. collapse (sum) emp est, by(naics)

. gen emp_per_est = emp/est

. sum emp_per_est if naics == "7225//"

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
 emp_per_est |          1    19.01272           .   19.01272   19.01272

. 
. ** 2022 
. import delimited "${raw_root}/county_business_patterns/cbp22co.txt", clear
(encoding automatically selected: UTF-8)
(23 vars, 1,100,804 obs)

. 
. keep if substr(naics,1,4) == "7225"
(1,086,950 observations deleted)

. collapse (sum) emp est, by(naics)

. gen emp_per_est = emp/est

. sum emp_per_est if naics == "7225//"

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
 emp_per_est |          1    17.91832           .   17.91832   17.91832

. 
. ** 2023 data will be released in summer of 2025
. 
. 
end of do-file


. 